
from sklearn.feature_extraction.text import CountVectorizer

count_vec = CountVectorizer()

sent1 = 'Trust Mechanism in Distributed Access Control Model of P2P Networks'

sent2 = 'An Approximate Method for Performance Evaluation of Asynchronous Pipeline Rings'

sent3 = 'A pharmacogenomic study on the polymorphic gene response to risperidone in schizophrenia patients'

sentences = [ sent1, sent2, sent3 ]

print count_vec.fit_transform(sentences).toarray()

print count_vec.get_feature_names()

sent4 = 'Function Call Flow based Fitness Function Design in Evolutionary Testing'

print count_vec.transform([sent4]).toarray()


"""
https://blog.csdn.net/papaaa/article/details/78821631
TfidfVectorizer = CountVectorizer＋TfidfTransformer
"""

